library(tidyverse)
library(minfi)
library(limma)
library(IlluminaHumanMethylationEPICanno.ilm10b4.hg19)
library(minfi)
library(DMRcate)
# call helpers
source("../helper-scripts/DMP-utility-functions.R")
source("../helper-scripts/plotting-functions.R")
source("../helper-scripts/probe-annotation.R")
source("../helper-scripts/shared-helpers.R")
# knitr settings
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
# Load Data
load(
"../../arsenic-epigenetics-meta/Chile/buccal/data/buccal_funnorm_data.RData"
)
pheno <- as.data.frame(pheno)
# load ReFACTor output, join with pheno
refactor_components <- read_tsv(
"preprocessing-reports/refactor_buccal.out.components.txt",
col_names = FALSE
)
colnames(refactor_components) <- paste0("refactor", seq_len(6))
pheno <- cbind(pheno, refactor_components)
# Look at exposure distribution
exp_dist_p <- pheno %>% ggplot(aes(exposed)) +
geom_histogram(alpha = 0.7, stat = "count", binwidth = 50) +
xlab("Exposed?") +
ylab("Count") +
ggtitle("Distribution of Arsenic Exposure") +
theme_minimal()
ggsave(filename = "EWAS-plots/arsenic-exp-dist.png", device = "png", dpi = 300)
exp_dist_p
# Unadjusted model matrix
modUnadj <- model.matrix(~pheno$exposed)
# Adjusted for cell type
modCell <- model.matrix(~pheno$exposed + pheno$refactor1 + pheno$refactor2 +
pheno$refactor3 + pheno$refactor4 + pheno$refactor5 +
pheno$refactor6)
# Adjusted for age, smoking, and sex
modAgeSMSex <- model.matrix(~pheno$exposed + pheno$age + pheno$smoking +
pheno$sex)
# Adjusted for age, smoking, sex, anc cell composition
modCellAgeSMSex <- model.matrix(~pheno$exposed + pheno$age + pheno$smoking +
pheno$sex + pheno$refactor1 + pheno$refactor2 +
pheno$refactor3 + pheno$refactor4 +
pheno$refactor5 + pheno$refactor6)
# Unadjusted
probeUnadj <- run_DMP(mvals = mvals_fun, design = modUnadj)
# no singificant CpG sites with BH correction
table(probeUnadj$P.Value < 0.05)
table(probeUnadj$adj.P.Val < 0.05)
# write sites with singificant raw p-values
write.table(
probeUnadj,
file = "EWAS-results/unadjusted.txt",
sep = "\t",
row.names = FALSE,
col.names = TRUE
)
# plot results
gg_qqplot(probeUnadj$P.Value)
quartz.save("EWAS-plots/qq_log_unadjusted.png", type = "png", dpi = 300)
volcano(probeUnadj)
quartz.save("EWAS-plots/vol_log_unadjusted.png", type = "png", dpi = 300)
manhattan(probeUnadj, Locations)
quartz.save("EWAS-plots/man_log_unadjusted.png", type = "png", dpi = 300)
# Adjusted for cell type
probeCell <- run_DMP(mvals = mvals_fun, design = modCell)
# no singificant CpG sites with BH correction
table(probeCell$P.Value < 0.05)
table(probeCell$adj.P.Val < 0.05)
# save the results
write.table(
probeCell,
file = "EWAS-results/cell.txt",
sep = "\t",
row.names = FALSE,
col.names = TRUE
)
# save the results
gg_qqplot(probeCell$P.Value)
quartz.save("EWAS-plots/qq_log_cell.png", type = "png", dpi = 300)
volcano(probeCell)
quartz.save("EWAS-plots/vol_log_cell.png", type = "png", dpi = 300)
manhattan(probeCell, Locations)
quartz.save("EWAS-plots/man_log_cell.png", type = "png", dpi = 300)
# Adjusted for age, smoking, and sex
probeAgeSmSex <- run_DMP(mvals = mvals_fun, design = modAgeSMSex)
# no singificant CpG sites with BH correction
table(probeAgeSmSex$P.Value < 0.05)
table(probeAgeSmSex$adj.P.Val < 0.05)
# save the results
write.table(
probeAgeSmSex,
file = "EWAS-results/age-sex-smoking.txt",
sep = "\t",
row.names = FALSE,
col.names = TRUE
)
# plot results
gg_qqplot(probeAgeSmSex$P.Value)
quartz.save("EWAS-plots/qq_log_sex-age-smoking.png", type = "png", dpi = 300)
volcano(probeAgeSmSex)
quartz.save("EWAS-plots/vol_log_sex-age-smoking.png", type = "png", dpi = 300)
manhattan(probeAgeSmSex, Locations)
quartz.save("EWAS-plots/man_log_sex-age-smoking.png", type = "png", dpi = 300)
# Adjusted for cell type, age, smoking, and sex
probeCellAgeSmSex <- run_DMP(mvals = mvals_fun, design = modCellAgeSMSex)
# no singificant CpG sites with BH correction
table(probeCellAgeSmSex$P.Value < 0.05)
table(probeCellAgeSmSex$adj.P.Val < 0.05)
# save the results
write.table(
probeCellAgeSmSex,
file = "EWAS-results/cell-age-sex-smoking.txt",
sep = "\t",
row.names = FALSE,
col.names = TRUE
)
# plot results
gg_qqplot(probeCellAgeSmSex$P.Value)
quartz.save("EWAS-plots/qq_log_cell-sex-age-smoking.png", type = "png",
dpi = 300)
volcano(probeCellAgeSmSex)
quartz.save("EWAS-plots/vol_log_cell-sex-age-smoking.png", type = "png",
dpi = 300)
manhattan(probeCellAgeSmSex, Locations)
quartz.save("EWAS-plots/man_log_cell-sex-age-smoking.png", type = "png",
dpi = 300)
knitr::include_graphics("EWAS-plots/qq_log_unadjusted.png")
This model doesn’t appear to be accurate.
knitr::include_graphics("EWAS-plots/qq_log_sex-age-smoking.png")
knitr::include_graphics("EWAS-plots/vol_log_sex-age-smoking.png")
knitr::include_graphics("EWAS-plots/man_log_sex-age-smoking.png")
This model appears to be accurate. However, based on this plot, there probably
won’t be any significant p-value after controlling for multiple testing.
knitr::include_graphics("EWAS-plots/qq_log_cell-sex-age-smoking.png")
knitr::include_graphics("EWAS-plots/vol_log_cell-sex-age-smoking.png")
knitr::include_graphics("EWAS-plots/man_log_cell-sex-age-smoking.png")